import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import HTML, display
import sqlite3
import seaborn as sns
import plotly.express as px

results = pd.read_csv('data/results.csv', index_col=0, na_values="\\N")
races = pd.read_csv('data/races.csv', index_col=0, na_values="\\N")
drivers = pd.read_csv('data/drivers.csv', index_col=0, na_values="\\N")
constructors = pd.read_csv('data/constructors.csv', index_col=0, na_values="\\N")
pitStops = pd.read_csv('data/pit_stops.csv', index_col=0, na_values="\\N")
lapTimes = pd.read_csv('data/lap_times.csv', index_col=0, na_values="\\N")
circuits = pd.read_csv('data/circuits.csv', index_col=0, na_values="\\N")
results
| raceId | driverId | constructorId | number | grid | position | positionText | positionOrder | points | laps | time | milliseconds | fastestLap | rank | fastestLapTime | fastestLapSpeed | statusId | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| resultId | |||||||||||||||||
| 1 | 18 | 1 | 1 | 22.0 | 1 | 1.0 | 1 | 1 | 10.0 | 58 | 1:34:50.616 | 5690616.0 | 39.0 | 2.0 | 1:27.452 | 218.300 | 1 |
| 2 | 18 | 2 | 2 | 3.0 | 5 | 2.0 | 2 | 2 | 8.0 | 58 | +5.478 | 5696094.0 | 41.0 | 3.0 | 1:27.739 | 217.586 | 1 |
| 3 | 18 | 3 | 3 | 7.0 | 7 | 3.0 | 3 | 3 | 6.0 | 58 | +8.163 | 5698779.0 | 41.0 | 5.0 | 1:28.090 | 216.719 | 1 |
| 4 | 18 | 4 | 4 | 5.0 | 11 | 4.0 | 4 | 4 | 5.0 | 58 | +17.181 | 5707797.0 | 58.0 | 7.0 | 1:28.603 | 215.464 | 1 |
| 5 | 18 | 5 | 1 | 23.0 | 3 | 5.0 | 5 | 5 | 4.0 | 58 | +18.014 | 5708630.0 | 43.0 | 1.0 | 1:27.418 | 218.385 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 25461 | 1076 | 849 | 3 | 6.0 | 18 | 16.0 | 16 | 16 | 0.0 | 57 | NaN | NaN | 49.0 | 18.0 | 1:23.882 | 226.518 | 11 |
| 25462 | 1076 | 4 | 214 | 14.0 | 10 | 17.0 | 17 | 17 | 0.0 | 57 | NaN | NaN | 57.0 | 2.0 | 1:20.846 | 235.024 | 11 |
| 25463 | 1076 | 830 | 9 | 1.0 | 2 | NaN | R | 18 | 0.0 | 38 | NaN | NaN | 37.0 | 6.0 | 1:21.677 | 232.633 | 9 |
| 25464 | 1076 | 20 | 117 | 5.0 | 17 | NaN | R | 19 | 0.0 | 22 | NaN | NaN | 17.0 | 19.0 | 1:25.189 | 223.042 | 3 |
| 25465 | 1076 | 832 | 6 | 55.0 | 9 | NaN | R | 20 | 0.0 | 1 | NaN | NaN | NaN | 0.0 | NaN | NaN | 20 |
25460 rows × 17 columns

conn = sqlite3.connect("f1.db")
cursor = conn.cursor
circuits.to_sql("circuits", con = conn, if_exists='replace')
constructors.to_sql("constructors", con = conn, if_exists='replace')
drivers.to_sql("drivers", con = conn, if_exists='replace')
lapTimes.to_sql("lapTimes", con = conn, if_exists='replace')
pitStops.to_sql("pitStops", con = conn, if_exists='replace')
races.to_sql("races", con = conn, if_exists='replace')
results.to_sql("results", con = conn, if_exists='replace')

track_num = pd.read_sql_query("""
SELECT location, COUNT(location) as races_hosted FROM circuits C
JOIN races R
ON R.circuitId = C.circuitId
WHERE date <="2022-03-01"
GROUP BY 1
ORDER BY 2 desc
LIMIT 10
""", conn)
track_num
| location | races_hosted | |
|---|---|---|
| 0 | Monza | 72 |
| 1 | Silverstone | 53 |
| 2 | Montreal | 37 |
| 3 | Spielberg | 29 |
| 4 | Monte-Carlo | 29 |
| 5 | Nürburg | 27 |
| 6 | Spa | 25 |
| 7 | Budapest | 25 |
| 8 | Suzuka | 24 |
| 9 | Montmeló | 23 |
#plotting bar plot
px.bar(track_num, y='location', x='races_hosted',labels={'location':'Location', 'races_hosted':'Number of Races Hosted'}, text='races_hosted', title = 'Top 10 tracks since 1950')
#fig.update_traces(textfont_size=13, textangle=0, textposition="outside")
#fig.show()
df = pd.read_sql_query("""
SELECT D.driverId, forename as name, SUM(points) as Total_points
FROM drivers D
JOIN results R
ON D.driverId = R.driverId
GROUP BY 2
ORDER BY 3 desc
limit 10""", conn)
df
| driverId | name | Total_points | |
|---|---|---|---|
| 0 | 1 | Lewis | 4191.5 |
| 1 | 20 | Sebastian | 3061.0 |
| 2 | 3 | Nico | 2115.5 |
| 3 | 4 | Fernando | 1982.0 |
| 4 | 8 | Kimi | 1873.0 |
| 5 | 822 | Valtteri | 1743.0 |
| 6 | 349 | Max | 1575.5 |
| 7 | 30 | Michael | 1573.0 |
| 8 | 817 | Daniel | 1281.0 |
| 9 | 18 | Jenson | 1235.0 |
country_num = pd.read_sql_query("""
SELECT country, COUNT(country) as Races_hosted FROM circuits C
JOIN races R
ON R.circuitId = C.circuitId
WHERE date <="2022-03-01"
GROUP BY 1
ORDER BY 2 desc
limit 10
""", conn)
country_num
| country | Races_hosted | |
|---|---|---|
| 0 | Italy | 92 |
| 1 | UK | 70 |
| 2 | France | 48 |
| 3 | USA | 44 |
| 4 | Canada | 42 |
| 5 | Germany | 39 |
| 6 | Spain | 35 |
| 7 | Belgium | 34 |
| 8 | Australia | 30 |
| 9 | Monaco | 29 |
px.bar(country_num, x='country', y='Races_hosted',labels={'country':'Location', 'Races_hosted':'Number of Races Hosted'}, title = 'Number of races hosted per country since 1950', color = 'Races_hosted',)
won_races = pd.read_sql_query("""
SELECT C.name, COUNT(raceId) as Number_of_wins FROM constructors C
JOIN results R
ON R.constructorId = C.constructorId
WHERE r.position = 1
GROUP BY 1
ORDER by 2 desc
limit 10
""", conn)
won_races
| name | Number_of_wins | |
|---|---|---|
| 0 | Ferrari | 241 |
| 1 | McLaren | 179 |
| 2 | Mercedes | 124 |
| 3 | Williams | 114 |
| 4 | Red Bull | 76 |
| 5 | Team Lotus | 45 |
| 6 | Renault | 35 |
| 7 | Benetton | 27 |
| 8 | Tyrrell | 23 |
| 9 | Brabham | 23 |
px.bar(won_races, x='name', y='Number_of_wins', text='Number_of_wins', title = 'Teams with highest number of race wins')
championship_wins = pd.read_sql_query("""
WITH Season_total AS (
SELECT sub_q2.constructorId, C.name as winner, sub_q2.year, MAX(sub_q2.points) as Season_points
FROM
(SELECT sub_q1.constructorId, sub_q1.year, SUM(sub_q1.pts) as points
FROM
(SELECT Re.raceId, Re.constructorId,R.year,SUM(Re.points) as pts
FROM results as Re
JOIN races as R
ON Re.raceId=R.raceId
GROUP BY Re.raceId, Re.constructorId) as sub_q1
GROUP BY sub_q1.constructorId, sub_q1.year) as sub_q2
JOIN constructors as C
ON sub_q2.constructorId = C.constructorId
WHERE sub_q2.year!=2022
GROUP BY 3
Order by 3 desc
)
SELECT winner, COUNT(Season_points) as Season_wins
FROM Season_total
GROUP BY winner
ORDER BY Season_wins desc, year desc
""",conn)
championship_wins
| winner | Season_wins | |
|---|---|---|
| 0 | Ferrari | 22 |
| 1 | Mercedes | 9 |
| 2 | McLaren | 9 |
| 3 | Williams | 9 |
| 4 | Red Bull | 4 |
| 5 | BRM | 3 |
| 6 | Renault | 2 |
| 7 | Tyrrell | 2 |
| 8 | Brabham-Repco | 2 |
| 9 | Cooper-Climax | 2 |
| 10 | Brawn | 1 |
| 11 | Benetton | 1 |
| 12 | Team Lotus | 1 |
| 13 | Matra-Ford | 1 |
| 14 | Lotus-Ford | 1 |
| 15 | Lotus-Climax | 1 |
| 16 | Maserati | 1 |
| 17 | Alfa Romeo | 1 |
px.bar(championship_wins.head(), x='winner', y='Season_wins',labels={'winner':'Constructor'}, title = 'Constructors with the highest number of championship wins')